# import magrittr for piping
library(magrittr)
library(readr)
library(dplyr)
library(plotly)
#read in data to analyze
atp_table <- read_csv("atp_tennis.csv")
Rows: 25362 Columns: 17── Column specification ─────────────────────────────────────────────────────────
Delimiter: ","
chr (9): Tournament, Series, Court, Surface, Round, Player_1, Player_2, Winn...
dbl (7): Best of, Rank_1, Rank_2, Pts_1, Pts_2, Odd_1, Odd_2
date (1): Date
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(atp_table)
NA
#plotly tests
atp_dated <- atp_table %>% filter(Date < "2013-01-01")
plot_ly(atp_dated, x = ~Rank_1, y = ~Rank_2,
text = ~Player_1, type = 'scatter', mode = 'markers',
marker = list(size = ~Odd_2, opacity = 0.5))
player_name = "Djokovic N."
atp_table_swapped <- atp_table %>% rename(Player_1 = Player_2, Player_2 = Player_1)
atp_combined <- rbind(atp_table, atp_table_swapped)
atp_combined <- atp_combined[order(atp_combined$Date),]
opponents <- atp_combined %>%
filter(Player_1 == player_name) %>%
select(Player_2) %>%
table(dnn = "name")
opponents %<>% as.data.frame()
opponents <- opponents[rev(order(opponents$Freq)),]
fig <- plot_ly(opponents, x = ~name, y = ~Freq, type = 'bar')
fig
player_results = list()
players = c("Djokovic N.", "Nadal R.")
for(player in players){
# grab the relevant player ratings
atp_player_result <- atp_combined %>%
filter(Player_1 == player) %>%
# grab names ranks and dates
select(Rank_1 | Rank_2 | Date | Player_1 | Player_2)
# add the player result to the list
player_results[[length(player_results)+1]] = atp_player_result
}
ggplot(bind_rows(player_results, .id="data_frame"),
aes(x=Date, y=Rank_1, group = Player_1)) +
geom_line(aes(color = Player_1)) +
scale_y_reverse() +
labs(y = "Ranking", title = "Player Ranking over Time")
library(ggplot2)
ggplot(head(opponents,10), aes(x=name, y = Freq))
library(shiny)
player_selections <<- data.frame( player =
c(atp_table$Player_1,
atp_table$Player_2)) %>%
# used https://www.rdocumentation.org/packages/base/versions/3.6.2/topics/table
# need the column name to be the same every time
table(dnn = list("name")) %>%
as.data.frame(responseName = "freq")
# https://stackoverflow.com/questions/62716572/selectinput-category-selection
# Stéphane Laurent
# https://shiny.posit.co/r/reference/shiny/latest/updateselectinput
# for context (lets me know onInitialize is running as JavaScript)
onInitialize <- "
function(){
var select = this.$input[0];
this.$dropdown_content.on('mousedown', function(e){
e.preventDefault();
return false;
}).on('click', '.optgroup-header', function(e){
var options = $(this).parent().find('.option');
var items = [];
options.each(function(i, opt){items.push($(opt).data('value'));});
var selections = select.selectize.items;
select.selectize.setValue(items.concat(selections));
});
}
"
shinyApp(
ui = fluidPage(
selectizeInput("state", "Choose a player:",
player_selections,
multiple = TRUE,
options = list(
onInitialize = I(onInitialize)
)
)
),
server = function(input, output){}
)
Listening on http://127.0.0.1:5536
NA
library(GGally)
library(igraph)
player = "Djokovic N."
atp_player_result <- atp_combined %>%
filter(Player_1 == player) %>%
# grab names ranks and dates
select(Player_2)
#make a data frame with the frequency of occurrences
network_table_df <- atp_player_result %>%
table(dnn = list("name")) %>%
as.data.frame(responseName = "freq")
#order by frequency
network_table_df <- network_table_df[order(network_table_df$freq, decreasing = TRUE),]
simple_edge_df <- data.frame(
from = rep(player,length(network_table_df$name)),
to = network_table_df$name,
weight = network_table_df$freq
)
net <- graph_from_data_frame(simple_edge_df)
p <- ggnet2(simplify(net), opponent = to, label = TRUE)
Error in list2(parse = parse, check_overlap = check_overlap, na.rm = na.rm, :
object 'to' not found
simplify(net)
IGRAPH 4cf45fe DNW- 176 175 --
+ attr: name (v/c), weight (e/n)
+ edges from 4cf45fe (vertex names):
[1] Djokovic N.->Nadal R. Djokovic N.->Federer R.
[3] Djokovic N.->Murray A. Djokovic N.->Nishikori K.
[5] Djokovic N.->Berdych T. Djokovic N.->Cilic M.
[7] Djokovic N.->Tsitsipas S. Djokovic N.->Wawrinka S.
[9] Djokovic N.->Medvedev D. Djokovic N.->Thiem D.
[11] Djokovic N.->Kohlschreiber P. Djokovic N.->Raonic M.
[13] Djokovic N.->Del Potro J.M. Djokovic N.->Dimitrov G.
[15] Djokovic N.->Monfils G. Djokovic N.->Bautista Agut R.
+ ... omitted several edges
rep(player,length(atp_player_result$Player_2))
lets see if we can make
#read in data to analyze
old_atp_df <- read_csv("Tennis Data.csv")
Warning: One or more parsing issues, call `problems()` on your data frame for details, e.g.:
dat <- vroom(...)
problems(dat)Rows: 46652 Columns: 54── Column specification ───────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (12): Location, Tournament, Date, Series, Court, Surface, Round, Winner, Loser, LRank, Comm...
dbl (42): ATP, Best of, WRank, W1, L1, W2, L2, W3, L3, W4, L4, W5, L5, Wsets, Lsets, CBW, CBL, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(old_atp_df)
library(dplyr)
#old_tournament_locations <- old_atp_df[!duplicated(old_atp_df$Tournament),]
new_tournaments <- atp_table[!duplicated(atp_table$Tournament), "Tournament"]
#new_tournaments$Tournament %in%old_tournament_locations$Tournament
#old_tournament_locations$Tournament %in% new_tournaments$Tournament
new_tournaments
library(maps)
#map.cities(old_tournament_locations$Location)
world.cities
#old_tournament_locations$Location[which(old_tournament_locations$Location %in% world.cities$name)]
world.cities[which(world.cities$name %in% old_tournament_locations$Location),]
locations <- world.cities[which(world.cities$name %in% old_tournament_locations$Location),]
fig <- plot_geo(locations, lat = ~lat, lon = ~long)
fig <- fig %>% add_markers(
text = ~paste(airport, city, state, paste("Arrivals:", cnt), sep = "<br />"),
color = ~cnt, symbol = I("square"), size = I(8), hoverinfo = "text"
)
fig
result <- merge(old_tournament_locations,
world.cities, by.x = "Location", by.y = "name")
#with(world.data, lat[match(old_tournament_locations$Location, name])
player = "Djokovic N."
tournament = atp_combined$Tournament[1]
atp_player_result <- atp_combined %>%
filter(Player_1 == player & Tournament == tournament) %>%
select(Player_2)
atp_player_result
network_table_df <- atp_player_result %>%
table(dnn = list("name")) %>%
as.data.frame(responseName = "freq")
network_table_df <- network_table_df[order(network_table_df$freq, decreasing = TRUE),]
edge_df <- data.frame(
from = rep(player,length(network_table_df$name)),
to = network_table_df$name,
weight = network_table_df$freq
)
net <- graph_from_data_frame(edge_df)
p <- ggnet2(simplify(net), size = 3, label = TRUE)
p <- ggplotly(p) %>% layout(xaxis = list(visible = FALSE),
yaxis = list(visible = FALSE))
p
NA
old_atp_df$